****************************************************
*** BUILD DATASET FOR HOSPITAL-LEVEL REGRESSIONS ***
*** Last edited: 4/6/2023         		   		 ***
****************************************************

log using "${CodePath}/log/buildhospitalyear_$S_DATE.log", text replace


*--------------------------------------------------
* Merge and clean data
*--------------------------------------------------

// Load HCRIS data -- downloaded and cleaned using Adam Sacarny's HCRIS files (https://github.com/asacarny/hospital-cost-reports) on 12/12/2020
	use "${DataPath}/hcris/hcris_merged_hospfyear.dta", clear

	// merge in predicted audit rate based on 2007-2009 claims -- available in replication
	preserve
		use "${DataPath}/predictions/byprovider0709_audit_in2011.dta", clear
		gen nonnum = real(provider) == .
		drop if nonnum
		destring provider, gen(pn)
		tempfile temp
		save `temp', replace
	restore
	
	merge m:1 pn using `temp'
	drop _merge
	rename pred_0709_m53_audit_in2011  pred_0709_aud2011
	
	drop ipcost* anccost* opcost* othreimbcost* specialcost* nonreimbcost*

	// merge in hospital name and state
	merge m:1 pn using "${DataPath}/hcris/hcrishospitals_10.dta"
	drop _merge
	merge m:1 pn using "${DataPath}/hcris/hcrishospitals_96.dta", update
	drop _merge


// Actual audit rate data -- long 
	preserve
		use "${DataPath}/claims/auditrates_0716.dta", clear
		gen nonnum = real(provider) == .
		drop if nonnum
		destring provider, gen(pn)

		forval y= 2000/2009{
			gen auditrate_in`y' = 0
			gen auditrate_by`y' = 0
			gen overpaidrate_in`y' = 0
			gen overpaidrate_by`y' = 0
		}

		reshape long auditrate_in overpaidrate_in auditrate_by overpaidrate_by, i(provider) j(fyear)
		drop provider nonnum

		bys pn (fyear): gen auditrate_min1 = auditrate_in[_n-1]
		bys pn (fyear): gen auditrate_min2 = auditrate_in[_n-2]
		bys pn (fyear): gen auditrate_min3 = auditrate_in[_n-3]

		replace auditrate_min1 = 0 if fyear == 2000
		replace auditrate_min2 = 0 if fyear == 2000 | fyear == 2001
		replace auditrate_min3 = 0 if fyear == 2000 | fyear == 2001 | fyear == 2002

		replace auditrate_in = auditrate_in * 100
		replace auditrate_by = auditrate_by * 100

		replace auditrate_min1 = auditrate_min1 * 100
		replace auditrate_min2 = auditrate_min2 * 100
		replace auditrate_min3 = auditrate_min3 * 100

		label var auditrate_in 	 "audit rate, this year"
		label var auditrate_min1 "audit rate, last year"
		label var auditrate_min2 "audit rate, 2 years ago"
		label var auditrate_min3 "audit rate, 3 years ago"
		label var auditrate_by 	 "share of claims audited by curr. year"

		tempfile temp 
		save `temp', replace
	restore
	merge 1:1 pn fyear using `temp', keep(3) gen(_auditratefyearmerge) // Dropped hospitals in HCRIs that are medical centers (not hospitals), rehab hospitals, psych hospitals, and hospitals that closed before auditing started

// Actual audit rate data -- wide
	preserve
		use "${DataPath}/claims/auditdemandrates_0716.dta", clear
		gen nonnum = real(provider) == .
		drop if nonnum
		destring provider, gen(pn)

		keep demandrate_in* demandoverrate_in* pn
		tempfile temp
		save `temp'
	restore
	merge m:1 pn using `temp'
	rename _merge _auditratemerge
	keep if _auditratemerge == 1 | _auditratemerge == 3

// Actual audit demand rate (percent of claims that are demanded), long
	preserve
		use "${DataPath}/claims/auditdemandrates_0716.dta", clear
		gen nonnum = real(provider) == .
		drop if nonnum
		destring provider, gen(pn)

		forval y= 2000/2009{
			gen demandrate_in`y' = 0
			gen demandoverrate_in`y' = 0

		}

		reshape long demandrate_in demandoverrate_in, i(provider) j(fyear)

		replace demandrate_in = demandrate_in * 100
		replace demandoverrate_in = demandoverrate_in * 100

		drop provider nonnum
		tempfile temp
		save `temp'
	restore
	merge 1:1 pn fyear using `temp'
	rename _merge _demandratefyearmerge
	keep if _demandratefyearmerge == 1 | _demandratefyearmerge == 3

// Actual audit demand rate, wide
	preserve
		use "${DataPath}/claims/auditrates_0716.dta", clear
		gen nonnum = real(provider) == .
		drop if nonnum
		destring provider, gen(pn)

		keep auditrate_in* overpaidrate_in*  pn
		rename auditrate_in* audit_in*
		tempfile temp
		save `temp'
	restore
	merge m:1 pn using `temp'
	rename _merge _demandratemerge
	keep if _demandratemerge == 1 | _demandratemerge == 3

	gen demandafterauditrate_in = demandrate_in/auditrate_in
	label var demandafterauditrate_in "percent demanded after audit in this year"
	forval y = 2010/2015{
		gen demandafterauditrate_in`y' = demandrate_in`y'/audit_in`y'
		label var demandafterauditrate_in`y' "percent demanded after audit in `y'"
	}
// Actual audit demand amount -- available in replication packet
	preserve
		use "${DataPath}/Audit Data/demandamts_0915.dta", clear
		rename provider pn
		rename audit_fyear fyear
		tempfile temp
		save `temp'
	restore

	merge 1:1 pn fyear using `temp', gen(_demandamtmerge)
	keep if _demandamtmerge == 1 |  _demandamtmerge == 3
	replace DemandAmt_3y = 0 if fyear <= 2009


 // Load discharge data
	preserve
		use "${DataPath}/claims/n_claims_fyear9815.dta", clear
		gcollapse (sum) n_claims n_claims_*, by(fyear prvdrnum)
		gen nonnum = real(prvdrnum) == .
		drop if nonnum
		destring prvdrnum, gen(pn)
		keep pn n_claims fyear n_claims*
		label variable n_claims "n discharges"
		forval losnum = 0/20{
			label variable n_claims_`losnum' "n los = `losnum' discharges"
			gen sh_claims_`losnum' = n_claims_`losnum'/n_claims
			label variable sh_claims_`losnum' "share inpatient claims los = `los'"
		}
		label variable n_claims_2150 "n los = 21-50 discharges"
		gen sh_claims_2150 = n_claims_2150/n_claims
		label variable sh_claims_2150 "share inpatient claims los = 21-50"

		label variable n_claims_51100 "n los = 51-100 discharges"
		gen sh_claims_51100 = n_claims_51100/n_claims
		label variable sh_claims_51100 "share inpatient claims los = 51-100"

		label variable n_claims_101 "n los > 100 discharges"
		gen sh_claims_101 = n_claims_101/n_claims
		label variable sh_claims_101 "share inpatient claims los = 101+"

		keep if fyear >= 1998 & fyear <= 2015
		tempfile temp
		save `temp', replace
	restore  

	merge m:1 pn fyear using `temp'
	rename _merge _dischLOSmerge
	keep if _dischLOSmerge == 1 | _dischLOSmerge == 3


// Load payment data by LOS
	preserve
		use "${DataPath}/claims/medparchars_byLOSbin_fyear9915.dta", clear
		keep if fyear <= 2015 & fyear >= 2002
		gen nonnum = real(prvdrnum) == .
		drop if nonnum
		destring prvdrnum, gen(pn)
		
		gen tot_totchrg_02 = tot_totchrg_0 + tot_totchrg_1 + tot_totchrg_2
		gen tot_pmt_amt_02 = tot_pmt_amt_0 + tot_pmt_amt_1 + tot_pmt_amt_2
		gen tot_drgprice_02 = tot_drgprice_0 + tot_drgprice_1 + tot_drgprice_2
		gen n_claims_02 = n_claims_0 + n_claims_1 + n_claims_2

		egen tot_totchrg = rowtotal(tot_totchrg_*)
		egen tot_pmt_amt = rowtotal(tot_pmt_amt_*)
		egen tot_drgprice = rowtotal(tot_drgprice_*)
		egen n_claims = rowtotal(n_claims_*)

		gen tot_totchrg_over02 = tot_totchrg - tot_totchrg_02
		gen tot_pmt_amt_over02 = tot_pmt_amt - tot_pmt_amt_02
		gen tot_drgprice_over02 = tot_drgprice - tot_drgprice_02
		gen n_claims_over02 = n_claims - n_claims_02

		gen mean_totchrg_02 = tot_totchrg_02 / n_claims_02
		gen mean_pmt_amt_02 = tot_pmt_amt_02 / n_claims_02
		gen mean_drgprice_02 = tot_drgprice_02 / n_claims_02

		gen mean_totchrg_over02 = tot_totchrg_over02 / n_claims_over02
		gen mean_pmt_amt_over02 = tot_pmt_amt_over02 / n_claims_over02
		gen mean_drgprice_over02 = tot_drgprice_over02 / n_claims_over02

		keep *02 pn fyear
		drop n_claims*
		foreach var of varlist mean_* tot_*{
			gen log_`var' = ln(`var')
			local lab: variable label `var'
			label var log_`var' "log `lab'"			
		}

		tempfile temp
		save `temp'
	restore

		merge m:1 pn fyear using `temp', keep(1 3) gen(_dischlosmerge2)


// Load MEDPAR summary data 
	preserve
		use "${DataPath}/claims/medparchars_fyear9915.dta", clear
		gen nonnum = real(prvdrnum) == .
		drop if nonnum
		destring prvdrnum, gen(pn)
		

		foreach var of varlist mean_* tot_*{
			gen log_`var' = ln(`var')
			local lab: variable label `var'
			label var log_`var' "log `lab'"			
		}
		keep if fyear <= 2015
		tempfile temp
		save `temp'

	restore
	merge m:1 pn fyear using `temp', gen(_medparsummerge) keep(1 3)


// Load 100 percent emergency and claims data, with overlapping IP/OP claims dropped
	preserve
		use "${DataPath}/emergencyvisits/n_ip_emergencyvisits_nooverlaps_fy_0715.dta", clear
		gen nonnum = real(prvdrnum) == .
		drop if nonnum
		destring prvdrnum, gen(pn)
		drop nonnum
		rename n_visits n_ED_nooverlap
		rename n_ip n_ip_ED_nooverlap
		gen n_op_ED_nooverlap = (n_ED_nooverlap - n_ip_ED_nooverlap)
		rename p_ip p_ip_nooverlap

		tempfile temp
		save `temp'
	restore

	merge m:1 pn fyear using `temp', gen(_emergency100pct_nooverlapmerge) keep(1 3)


	// impact file in 2010 -- downloaded from https://www.nber.org/research/data/centers-medicare-medicaid-services-cms-impact-file-hospital-ipps on 5/22/2020 (also available in replication packet)
	merge m:1 pn using "${DataPath}/impact2010_clean.dta", keepusing(provname ssacounty urgeo wageindex adc opccr cpccr ptype bills cmiv beds mcr_pct)
	keep if _merge == 1 | _merge == 3
	drop _merge
	rename bills bills_2010
	rename cmiv cmiv_2010
	rename beds beds_2010
	rename mcr_pct mcr_pct_2010
	rename wageindex wageindex_2010


	// provider of services file in 2010 -- downloaded from https://www.nber.org/research/data/provider-services-files on 5/19/2019 (also available in replication packet)
	merge m:1 pn using "${DataPath}/pos2010_clean.dta", keepusing(city state zip medaffil shortterm cah provider_subtype nonprofit forprofit govt)
	drop _merge 


// Merge HRR-HSA -- downloaded from Dartmouth Atlas (https://data.dartmouthatlas.org/supplemental/) on 9/11/2019
	preserve
		use "${DataPath}/hosp_hsa_hrr-combined.dta", clear
		rename provider pn
		tempfile temp
		save `temp'
	restore
	merge m:1 pn using `temp', keepusing(hrrnum hsanum) keep(1 3) gen(_hrrmerge)

// Merge in PCE CPI to deflate -- downloaded from FRED https://fred.stlouisfed.org/series/PCEPI (annual, with aggregation method = average) on 5/5/2020
	preserve
		clear
		import delimited "${DataPath}/PCEPI.csv"
		gen date2 = date(date, "YMD")
		gen fyear = year(date2)
		keep fyear pcepi
		tempfile pcepi
		save `pcepi'
	restore
	merge m:1 fyear using `pcepi', keep(3) nogenerate

gen real_tot_net= totcost_net*100/pcepi
gen real_tot_sal= totcost_net*100/pcepi
gen real_tot_oth= totcost_net*100/pcepi


foreach var of varlist  *_net *_sal *_oth *_tot opalloc* totcost netpatrev othinc *rev uccare_cost_harmonized *_adj{

		capture gen real_`var'= `var'*100/pcepi
		if strpos("`var'", "_adj") == 0{ // Most adjustments are negative
			capture gen log_real_`var' = ln(real_`var')
		}
		capture gen ihs_real_`var' = ln(real_`var' + sqrt(real_`var'^2 + 1))
		capture gen l1_real_`var'  = ln(1 + real_`var')

		local var_short = substr("`var'", 1, length("`var'")-4)
		di "`var_short'"
		// Calculate shares for cost vars
		if !inlist("`var'", "totcost", "totcost_net", "totcost_sal", "totcost_oth", "othinc", "real_tot_net", "real_tot_sal", "real_tot_oth", "uccare_cost_harmonized") & strpos("`var'", "rev") == 0 & strpos("`var'", "log") == 0 & strpos("`var'", "opalloc") == 0 & strpos("`var'", "_tot") == 0{
			if strpos("`var'", "_sal"){
				capture gen share_`var' = real_`var'/real_tot_sal
			}
			if strpos("`var'", "_oth"){
				capture gen share_`var' = real_`var'/real_tot_oth
			}
			if strpos("`var'", "_net"){
				capture gen share_`var' = real_`var'/real_tot_net
			}
			// Calculates what share of total cost are adjusted 
			if strpos("`var'", "_adj") & strpos("`var'", "othcap") == 0 & strpos("`var'", "building") == 0 & strpos("`var'", "equipold") == 0{
				capture gen shtc_`var' = -real_`var'/(real_`var_short'_sal - real_`var_short'_oth)
			}
		}

		// Calculate shares for revenue vars
		if strpos("`var'", "rev") > 0 & !inlist("`var'", "netpatrev", "tottotrev") & strpos("`var'", "log") == 0{
			capture gen share_`var' = real_`var'/real_tottotrev
		}
}
// Create winsorized admin costs -- remove any hospitals with admin costs in the top 1%
sum real_gcost_admin_net, detail
local p99 = r(p99)
local p01 = r(p1)
di "p99 = `p99'"
di "p01 = `p01'"
gen outlieradmin = (real_gcost_admin_net > `p99' | real_gcost_admin_net < `p01') & !missing(real_gcost_admin_net)
bys pn: egen any_outlieradmin = max(outlieradmin)

gen 	real_gcost_admin_net_win = real_gcost_admin_net
replace real_gcost_admin_net_win = . if any_outlieradmin == 1

gen log_real_gcost_admin_net_win = log(real_gcost_admin_net_win)
drop outlieradmin any_outlieradmin


// Label HCRIS outcome variables of interest
label variable real_gcost_admin_net 	      "real admin costs"
label variable log_real_gcost_admin_net       "log real admin costs"
label variable real_gcost_admin_net_win       "real admin costs (winsorized)"
label variable log_real_gcost_admin_net_win   "log real admin costs (winsorized)"
label variable share_gcost_admin_net 	  	  "share real admin costs"
label variable real_gcost_admin_sal       	  "real admin salary costs" 	// note that the components of administrative costs are *before* adjustments, so they are inaccurate
label variable real_gcost_admin_oth 	  	  "real admin other costs" 		// note that the components of administrative costs are *before* adjustments, so they are inaccurate
label variable log_real_gcost_admin_sal  	  "log real admin salary costs"
label variable log_real_gcost_admin_oth       "log real admin other costs"


// Calculate log claims
foreach var of varlist n_claims* n_ip_* n_op_*{
	cap drop log_`var'
	gen log_`var' = log(`var')
}


// Merge in Cooper et al mergers data -- downloaded from https://healthcarepricingproject.org/papers/paper-1 on 5/27/2020
	merge 1:1 pn fyear using "${DataPath}/mergerdata.dta"
	drop if _merge == 2
	drop _merge

	//what system was this hospital part of in 2010?
	gen sysid_10 = sysid if fyear == 2010
	sort pn fyear
	bys pn: replace sysid_10 = sysid_10[_n-1] if missing(sysid_10)
	gsort pn -fyear
	bys pn: replace sysid_10 = sysid_10[_n-1] if missing(sysid_10)
	sort pn fyear


encode state, gen(state_en)
encode urgeo, gen(urgeo_en)

gen     hospprofittype = 1 if nonprofit
replace hospprofittype = 2 if forprofit
replace hospprofittype = 3 if govt

label define hospprofittype_l 1 "nonprofit" 2 "forprofit" 3 "govt"
label values hospprofittype hospprofittype_l

gen     teaching = 0 if medaffil == 4
replace teaching = 1 if medaffil >= 1 & medaffil <= 3

gen     urban = 0 if urgeo == "RURAL"
replace urban = 1 if urgeo != "RURAL"

label define urban_l 1 "urban" 0 "rural"
label values urban urban_l

cap label define independent_l 1 "independent" 2 "system"

gen     RACregion = 1 if inlist(state, "ME", "VT", "NH", "MA", "CT", "RI", "NJ", "NY", "PA")
replace RACregion = 1 if inlist(state, "MD", "DE")
replace RACregion = 2 if inlist(state, "MN", "WI", "MI", "IL", "IN", "MI", "OH", "KY")
replace RACregion = 3 if inlist(state, "WV", "VA", "NC", "SC", "TN", "GA", "AL", "MS", "FL")
replace RACregion = 3 if inlist(state, "LA", "AR", "OK", "TX", "NM", "CO", "DC", "PR")
replace RACregion = 4 if inlist(state, "ND", "SD", "IA", "MO", "KS", "NE", "MT", "WY", "ID")
replace RACregion = 4 if inlist(state, "UT", "AZ", "NV", "WA", "OR", "CA", "AK", "HI")

label define RACregion_l 1 "A" 2 "B" 3 "C" 4 "D"
label values RACregion RACregion_l
label variable RACregion "RAC region"



gen n_claims_02 = n_claims_0 + n_claims_1 + n_claims_2
label variable n_claims_02 "n los = 0-2 discharges"
gen log_n_claims_02 = log(n_claims_02)
label variable log_n_claims_02 "log n los = 0-2 discharges" 

gen sh_claims_02 = n_claims_02/n_claims
label variable sh_claims_02 "share inpatient claims los = 0-2"

gen n_claims_over02 = n_claims - n_claims_0 - n_claims_1 - n_claims_2
gen log_n_claims_over02 = log(n_claims_over02)
label variable n_claims_over02 "n los > 2 discharges"
label variable log_n_claims_over02 "log n los > 2 discharges"

gen sh_claims_over02 = n_claims_over02/n_claims
label variable sh_claims_over02 "share inpatient claims los > 2"


// HCRIS reporting - 1996 vs 2010
gen sharefmt_10 = nfmt10/(nfmt96 + nfmt10)
bys pn: gen  sharefmt_10_2011_temp = sharefmt_10 if fyear == 2011
bys pn: egen sharefmt_10_2011 = mean(sharefmt_10_2011_temp)
drop sharefmt_10_2011_temp


// merge in observation stay + long outpatient stays from ED
preserve
	use "${DataPath}/observation/combined_obs_longop_claimlevel_0715_year.dta", clear
	gen nonnum = real(prvdrnum) == .
	drop if nonnum
	rename prvdrnum pn
	destring pn, replace
	rename year fyear
	tempfile temp
	save `temp'
restore
merge m:1 pn fyear using `temp', gen(_ED_obs_longop)


gen obs_oplongED_ratio = obs_oplongED/n_ED_nooverlap

// HIMSS application data -- downloaded via Dorenfest Institute (https://foundation.himss.org/Dorenfest) on 1/30/2021
preserve
	use "${DataPath}/HAEntityApplication_wide.dta", clear
	keep medicarenumber year *_136 
	egen n_contracting = rowtotal(app_contracting_*)
	egen n_contracting_miss = rowmiss(app_contracting_*)
	rename medicarenumber pn
	rename year fyear
	tempfile temp
	save `temp'
restore
merge m:1 pn fyear using `temp', keep(1 3) gen(_himsmerge)

// generate hospital characteristics in 2010 for heterogeneity analysis
	// teaching, independent, share of los 0-2 claims

	foreach var of varlist teaching independent app_contracted_136 {
		cap drop `var'_2010
		gen temp_`var' = `var' if fyear == 2010
		bys pn: egen `var'_2010 = mean(temp_`var')
		drop temp_`var'
	}


label define teaching_l 0 "not teaching" 1 "teaching" 
label values teaching_2010 teaching_l

label define app_contracted_136_l 0 "no med. necc. software" 1 "has med. necc. software" 
label values app_contracted_136_2010 app_contracted_136_l

// dummy variable for whether contracted for med necc between 2011-2015
	// 1) identify who did and did not implement tech between 2010 and 2015
	gen app_live_136_2010_temp = app_live_136 if fyear == 2010
	bys pn: egen app_live_136_2010 = min(app_live_136_2010_temp)
	drop app_live_136_2010_temp

	gen app_livecontract_136_2015_temp = app_live_136 == 1 | app_contracting_136 == 1 | app_contracted_136 == 1 if fyear == 2015
	bys pn: egen app_livecontract_136_2015 = min(app_livecontract_136_2015_temp)
	drop app_livecontract_136_2015_temp 

// hospitals that installed med necc between 2010 and 2015
gen install_app_136_1115 =  app_livecontract_136_2015 == 1 & app_live_136_2010 == 0
drop app_livecontract_136_2015 

label define install_app_136_1115_l 0 "no med. necc. install, 11-15" 1 "install med. necc., 11-15" 
label values install_app_136_1115 install_app_136_1115_l
label define abovemedian_l 0 "below median" 1 "above median" 

// above vs. below average short stay share
	foreach var of varlist sh_claims_02{
		cap drop `var'_2010
		gen temp_`var' = `var' if fyear == 2010
		sum temp_`var', detail
		local median = r(p50)
		gen temp2_`var' = 0 if fyear == 2010
		replace temp2_`var' = 1 if fyear == 2010 & `var' >= `median'

		bys pn: egen `var'_2010 = mean(temp2_`var')
		drop temp_`var' temp2_`var'
		label values `var'_2010 abovemedian_l
	}



// merge in CERT error DRG claims -- this is based on the 2010 CERT report (https://www.cms.gov/research-statistics-data-and-systems/monitoring-programs/medicare-ffs-compliance-programs/cert/cert-reports?page=0) accessed on 3/13/2019
preserve
	// separate base DRGs and elective vs. not.
	/* gen drg_toperror1 = drg == 469 | drg == 470
		label var drg_toperror1 "major joint replacement (469-470)"
	gen drg_toperror2 = drg >= 242 & drg <= 244
		label var drg_toperror2 "permanent cardiac pacemaker (242-244)"
	gen drg_toperror3 = drg == 246 | drg == 247
		label var drg_toperror3 "drug-eluting stent (246-247)"
	gen drg_toperror4 = drg == 871 | drg == 872
		label var drg_toperror4 "sepsis (871-872)"
	gen drg_toperror5 = drg == 313
		label var drg_toperror5 "chest pain (313)"
	gen drg_toperror6 = drg >= 377 & drg <= 379
		label var drg_toperror6 "gi hemorrhage (377-379)"
	gen drg_toperror7 = drg >= 329 & drg <= 331
		label var drg_toperror7 "major bowel procedures (329-331)"
	gen drg_toperror8 = drg >= 177 & drg <= 179
		label var drg_toperror8 "respiratory infections (177-179)"
	gen drg_toperror9 = drg == 391 | drg == 392
		label var drg_toperror9 "esophagitis  and other GI disorders (391-392)"
	gen drg_toperror10 = drg == 689 | drg == 690
		label var drg_toperror10 "kidney and UTI (689 - 690)"
	gen drg_toperror11 = drg == 640 | drg == 641
		label var drg_toperror11 "nutritional and metabolic (640-641)"
	gen drg_toperror12 = drg >= 682 & drg <= 684
		label var drg_toperror12 "renal failure (291-293)"
	gen drg_toperror13 = drg == 312
		label var drg_toperror13 "syncope and collapse (312)"
	gen drg_toperror14 = drg >= 291 & drg <= 293
		label var drg_toperror14 "heart failure and shock (291-293)"
	gen drg_toperror15 = drg >= 308 & drg <= 310
		label var drg_toperror15 "cardiac arrhythmia (308-309)"
	gen drg_toperror16 = drg >= 193 & drg <= 195
		label var drg_toperror16 "pneumonia and pleurisy (193-195)"
	gen drg_toperror17 = drg >= 280 & drg <= 282
		label var drg_toperror17 "AMI (280-282)"
	gen drg_toperror18 = drg >= 190 & drg <= 192
		label var drg_toperror18 "COPD (190-192)"
	gen drg_toperror19 = drg >= 480 & drg <= 482
		label var drg_toperror19 "hip and femur except major joint (480-482)"
	gen drg_toperror20 = drg >= 64 & drg <= 66
		label var drg_toperror20 "intracranial hemorrhage or cerebral infarction (064-066)"

	%major joint replacement (469-470) - ELECTIVE
	 % permanent cardiac pacemaker (242-244) - ELECTIVE
	 %drug-eluting stent (246-247)
	 % sepsis (871-872)
	 %chest pain (313)
	 %GI hemorrhage (377-379)
	 % major bowel procedures (329-331) - ELECTIVE
	 % respiratory infections (177-179)
	 % esophagitis and other GI disorders (391-392) - ELECTIVE
	 % kidney and UTI (689-690)
	 %nutritional and metabolic disorders (640-641) - ELECTIVE
	 %renal failure (291-293)
	 %syncope and collapse (312)
	 %heart failure and shock (291-293)
	 %cardiac arrhythmia (308-309)
	 %pneumonia and pleurisy (193-195)
	 %acute myocardial infarction (280-282)
	 %chronic obstructive pulmonary disease (190-192)
	 %hip and femur except major joint (480-482) - ELECTIVE
	 %and intracranial hemorrhage or cerebral infarction (064-066). */

	use "${DataPath}/claims/medpar_top20error_fyear9914.dta", clear
	gen nonnum = real(prvdrnum) == .
	drop if nonnum > 0
	drop nonnum
	gen pn = real(prvdrnum)
	drop prvdrnum
	
	foreach var of varlist drg_*{
		local label : variable label `var'
		local label2: subinstr local label "(sum) " ""
		label var `var' "`label2'"
		gen l_`var' = log(`var')
		label var l_`var' "log `label2'"
	}
	gen l_n_claims_notop20 = log(n_claims_notop20)
	label var l_n_claims_notop20 "log n non-top20 error drgs"

	gen sh_n_claims_top20error = drg_anytop20/n_claims
	label var sh_n_claims_top20error "share inpatient claims in top 20 error DRG"

	gen drg_top20_nonemergent = drg_toperror1 + drg_toperror2 + drg_toperror7 
	gen drg_top20_emergent = drg_anytop20 - drg_top20_nonemergent

	gen l_drg_top20_emergent = log(drg_top20_emergent)
	gen l_drg_top20_nonemergent = log(drg_top20_nonemergent)


	drop n_claims
	
	tempfile temp
	save `temp'
restore
merge m:1 pn fyear using `temp', keep(1 3) gen(_certerrordrgmerge)

// merge in ED pt outcome results
preserve
	use "${DataPath}/emergencyvisits/ptoutcomes_emergencyvisits_nooverlaps_fy_0715.dta", clear
	gen nonnum = real(prvdrnum) == .
	drop if nonnum > 0
	drop nonnum
	gen pn = real(prvdrnum)
	drop prvdrnum

	keep die_* revisit* pn fyear
	tempfile temp
	save `temp'

restore
merge m:1 pn fyear using `temp', keep(1 3) gen(_EDptoutcomesmerge)


save "${DataPath}/derived/hospyear_0715.dta", replace

log close

